********************************************************************************
clear all
set maxvar 30000
version 14
capture log close
set more off

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"

*
global MY_IN_PATH   "/Users/ben/Dropbox/RnD_tax_credit/ReStat_repl_package/Data"
global MY_OUT_PATH  "/Users/ben/Dropbox/RnD_tax_credit/ReStat_repl_package/Data"
*global MY_TEMP_PATH "..."

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log


*global MY_TEMP_PATH "..."

*global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
*global MY_LOG_FILE  ${MY_OUT_PATH}cr_out.log


*log using "${MY_LOG_FILE}", text replace
****************************************************************************************************
* import science data
*************************************************************************************************


import delimited ${MY_IN_PATH}/_pcs_countsbypatent.csv, clear

gen county = substr(patent,1,2)
keep if county == "us"

gen patentno = substr(patent,4,7)
destring patentno, gen(patent_) force

keep if patent_ != .
duplicates tag patent_, gen(d)
drop if d != 0

drop patent patentno county d
ren patent_ patent

compress
save ${MY_IN_PATH}/patent_science_bcites20231221.dta, replace


import delimited using ${MY_IN_PATH}/uspatentcitation.tsv, clear
keep patent_id citation_id
ren patent_id citing
ren citation_id cited

destring cited, force replace
drop if cited ==.
destring citing, force replace

* merge  citing gvkey
ren citing patent
merge n:1 patent using ${MY_IN_PATH}/patent_gvkey.dta
keep if _merge ==3
drop _merge 
ren gvkey gvkey_citing
ren patent citing

* merge cited gvkey
ren cited patent
merge n:1 patent using ${MY_IN_PATH}/patent_gvkey.dta
keep if _merge ==3
drop _merge 
ren gvkey gvkey_cited
ren patent cited

gegen bcites_pv = count(1), by(citing)

gegen sbcites_pv = count(1) if gvkey_citing == gvkey_cited, by(citing)

keep bcites_pv sbcites_pv citing
ren citing patent
bysort patent: gen n = _n
keep if n ==1
drop n
compress

* merge science bcites
merge 1:1 patent using ${MY_IN_PATH}/patent_science_bcites20231221.dta
ren _merge mscites

merge 1:1 patent using ${MY_IN_PATH}/RnD_tax_credit\Data\patents_for_emma.dta
drop if gvkey == .

foreach var of varlist bcites_pv-ncitbothext{
	gegen  n`var' = sum(`var'), by (gvkey ayear)
}

duplicates drop gvkey ayear, force
ren ayear year

keep gvkey year nbcites_pv-nncitbothext

foreach var of varlist nbcites_pv-nncitbothext{
	gen  fr`var' = `var'/nbcites_pv
}

sum nbcites_pv - frnncitbothext

compress
save ${MY_OUT_PATH}/bcites_20231228.dta, replace
